# ----------------------------------------------------------------------
#  Project: "Prosocial Behavior amid Violence -- Study 1"
#  Journal: Political Psychology
#  Last updated: "Aug 13 2023"
# ----------------------------------------------------------------------

rm(list = ls())

# set wd ----
setwd("")

# Loading Necessary Libraries ----
library(haven)
library(tidyverse)
library(estimatr)
library(coefplot)
library(stdidx)
library(mediation)
library(scales)
library(texreg)
library(modelsummary)


# read data ---------
d <- readRDS(file = "data_study1.rds")

#### Correlational analysis ------------
# letter
models <- list()
(models[['Letter of Support']] <- lm_robust(w1_letter_interest ~
                         w1_mech_main_3 + # locus of outgroup control
                         identif_victims_4 + empathy_baseline_2 +
                         educ	+  factor(ethnic) + ideology_1 +
                         indirect_victim + trust_ffaa_1 +
                         PRI + PRD + Morena + PAN +
                         SELs + Gender + Ages + factor(GEO),
                        se_type = "HC0", data = d)
)

# feeling therm
(models[['Feeling Thermometer']] <- 
    lm_robust(w1_feeling_therm_1_rescale ~
                w1_mech_main_3 + # locus of outgroup control
                identif_victims_4 + empathy_baseline_2 +
                educ	+  factor(ethnic) + ideology_1 +
                indirect_victim + trust_ffaa_1 +
                PRI + PRD + Morena + PAN +
                SELs + Gender + Ages + factor(GEO),
              se_type = "HC0", data = d)
  
)

# social distance
(models[['Social Distance']]  <- 
    lm_robust(z_dist_rescale ~
                w1_mech_main_3 + # locus of outgroup control
                identif_victims_4 + empathy_baseline_2 +
                educ	+  factor(ethnic) + ideology_1 +
                indirect_victim + trust_ffaa_1 +
                PRI + PRD + Morena + PAN +
                SELs + Gender + Ages + factor(GEO),
              se_type = "HC0", data = d)
)

# empathy
(models[['Empathy']] <-
    lm_robust(compassion_rescale ~
                w1_mech_main_3 + # locus of outgroup control
                identif_victims_4 + empathy_baseline_2 +
                educ	+  factor(ethnic) + ideology_1 +
                indirect_victim + trust_ffaa_1 +
                PRI + PRD + Morena + PAN +
                SELs + Gender + Ages + factor(GEO),
              se_type = "HC0", data = d)
)

# social norms
(models[['Social Norms']] <- 
    lm_robust(norms_rescale ~
                w1_mech_main_3 + # locus of outgroup control
                identif_victims_4 + empathy_baseline_2 +
                educ	+  factor(ethnic) + ideology_1 +
                indirect_victim + trust_ffaa_1 +
                PRI + PRD + Morena + PAN +
                SELs + Gender + Ages + factor(GEO),
              se_type = "HC0", data = d)
                                      
)

rows <- tibble::tribble(~term, ~` `, ~Multivariate,
                        'Empty row', '-', '-',
                        'Another empty row', '?', '?')

rows <- tibble::tribble(~`Region FEs`, ~` `, ~` `, ~` `, ~` `,  ~` `,
                        'Region FEs', 'Yes', 'Yes', 'Yes', 'Yes',  'Yes')

# shorter version (no covariates)
modelsummary(models,
             coef_map = c("w1_mech_main_3" = "Controllability"), stars = T,
             output = "latex")

#### Table 2
# longer version (w/ covariates)
modelsummary(models, stars = T, coef_omit = "factor(GEO)",
             coef_map = c("w1_mech_main_3" = "Controllability",
                          "identif_victims_4" = "Identification with Victims",
                          "empathy_baseline_2" = "Baseline Empathy",
                          "educ" = "Education",
                          "ideology_1" = "Ideology",
                          "indrect_victim" = "Indrect Victim",
                          "trust_ffaa_1" = "Trust in Armed Forces",
                          "SELs" = "Socioeconomic Status",
                          "GenderFemale" = "Gender (Female = 1)",
                          "Ages" = "Age",
                          "factor(ethnic)Mixed"	= "Ethnicity (Mixed)",
                          "factor(ethnic)Indigenous" = "Ethnicity (Indigenous)",
                          "factor(ethnic)Black" = "Ethnicity (Black)",
                          "factor(ethnic)Other" = "Ethnicity (Other)"),
             gof_omit = "R2", add_rows = rows, output = "latex")


#### Text analysis - Fig. 1 -----

## STEP 1: Retrieving the data and uploading the packages

library(wordcloud)
library(RColorBrewer)
library(wordcloud2)
library(quanteda.textstats)
library(quanteda)
library(tm)
library(readxl)

#Create a vector containing only the text
text <- read_xlsx("letter_messages_only_english.xlsx") |>
  iconv(to = "UTF-8")
# Create a corpus  
docs <- Corpus(VectorSource(text))


## STEP 2: Clean the text data

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
#docs <- tm_map(docs, removeWords, c("gente", "personas", "usted", "as�", "creo",
#                                    stopwords("spanish")))
docs <- tm_map(docs, removeWords, c(stopwords("english"), "people",
                                    "also", " will", "like", "due", "one"))
## STEP 3: Create a document-term-matrix

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df <- data.frame(word = names(words),freq=words)


## STEP 4: Generate the word cloud
set.seed(1234) # for reproducibility

wordcloud2(data=df, size=1.6, color='random-dark')
